
library(dplyr)
library(here)
library(tibble) #to add columns
library(DescTools) #alpha
library(rstatix) #convert_as_factor,  get_summary_stats


data <- read.csv(here("data-raw", "data_raw_study1.csv"))




# Data Management Study 1 --------------------

## remove incomplete cases
data <- data %>%
  dplyr::filter(lastpage >=14)

# remove all timings except interview time
duration <- data$interviewtime
data <- add_column(data, duration, .after="datestamp")
rm(duration)

data <- data %>%
  dplyr::select(-c(matches("^Time"))) %>%
  dplyr::select(-c(contains("Time"))) 


# remove other unnecessary variables (e.g., group names)
data <- data %>%
  subset(select = -c(startlanguage, seed,  textDanke, submitdate, startdate))

data <- data %>%
  rename(age_class = equation)


data <- data %>%
  rename(subjective_basic_FL = selfratedknowledge.selfbFL.,
         subjective_advanced_FL = selfratedknowledge.selfaFL.,
         subjective_SFL = selfratedknowledge.selfSFL., 
         subjective_SL = selfratedknowledge.selfSL.)


### self ratings to 0-1

#data$subjective_basic_FL <- data$subjective_basic_FL/7
#data$subjective_advanced_FL <- data$subjective_advanced_FL/7
#data$subjective_SFL <- data$subjective_SFL/7
#data$subjective_SL <- data$subjective_SL/7

data$subjective_basic_FL <- (data$subjective_basic_FL-1)/6
data$subjective_advanced_FL <- (data$subjective_advanced_FL-1)/6
data$subjective_SFL <- (data$subjective_SFL-1)/6
data$subjective_SL <- (data$subjective_SL-1)/6



### correction of NA which are due to exclusive options

# SDGS question

data$SLSDGs.1.[which(is.na(data$SLSDGs.1.) & data$SLSDGs.4. == 1)] <- 0
data$SLSDGs.2.[which(is.na(data$SLSDGs.2.) & data$SLSDGs.4. == 1)] <- 0
data$SLSDGs.3.[which(is.na(data$SLSDGs.3.) & data$SLSDGs.4. == 1)] <- 0


# assets overall 
data$assetsOverallCurrent.1.[which(is.na(data$assetsOverallCurrent.1.) & 
                                     data$assetsOverallCurrent.12. == 1)] <- 0
data$assetsOverallCurrent.2.[which(is.na(data$assetsOverallCurrent.2.) & 
                                     data$assetsOverallCurrent.12. == 1)] <- 0
data$assetsOverallCurrent.3.[which(is.na(data$assetsOverallCurrent.3.) & 
                                     data$assetsOverallCurrent.12. == 1)] <- 0
data$assetsOverallCurrent.4.[which(is.na(data$assetsOverallCurrent.4.) & 
                                     data$assetsOverallCurrent.12. == 1)] <- 0
data$assetsOverallCurrent.5.[which(is.na(data$assetsOverallCurrent.5.) & 
                                     data$assetsOverallCurrent.12. == 1)] <- 0
data$assetsOverallCurrent.6.[which(is.na(data$assetsOverallCurrent.6.) & 
                                     data$assetsOverallCurrent.12. == 1)] <- 0
data$assetsOverallCurrent.7.[which(is.na(data$assetsOverallCurrent.7.) & 
                                     data$assetsOverallCurrent.12. == 1)] <- 0
data$assetsOverallCurrent.8.[which(is.na(data$assetsOverallCurrent.8.) & 
                                     data$assetsOverallCurrent.12. == 1)] <- 0
data$assetsOverallCurrent.9.[which(is.na(data$assetsOverallCurrent.9.) & 
                                     data$assetsOverallCurrent.12. == 1)] <- 0
data$assetsOverallCurrent.10.[which(is.na(data$assetsOverallCurrent.10.) & 
                                      data$assetsOverallCurrent.12. == 1)] <- 0
data$assetsOverallCurrent.11.[which(is.na(data$assetsOverallCurrent.11.) & 
                                      data$assetsOverallCurrent.12. == 1)] <- 0



### assets sustainable 
data$assetsSustainable.1.[which(is.na(data$assetsSustainable.1.) & 
                                  data$assetsSustainable.8. == 1)] <- 0
data$assetsSustainable.2.[which(is.na(data$assetsSustainable.2.) & 
                                  data$assetsSustainable.8. == 1)] <- 0
data$assetsSustainable.3.[which(is.na(data$assetsSustainable.3.) & 
                                  data$assetsSustainable.8. == 1)] <- 0
data$assetsSustainable.4.[which(is.na(data$assetsSustainable.4.) & 
                                  data$assetsSustainable.8. == 1)] <- 0
data$assetsSustainable.5.[which(is.na(data$assetsSustainable.5.) & 
                                  data$assetsSustainable.8. == 1)] <- 0
data$assetsSustainable.6.[which(is.na(data$assetsSustainable.6.) & 
                                  data$assetsSustainable.8. == 1)] <- 0
data$assetsSustainable.7.[which(is.na(data$assetsSustainable.7.) & 
                                  data$assetsSustainable.8. == 1)] <- 0




# Recoding

### dummies for correct answers basic FL 

FL_b_compoundinterest_correct <- ifelse(data$FKbcompoundinterest == 1, 1, 0)
data <- add_column(data, FL_b_compoundinterest_correct, 
                   .after="FKbcompoundinterest")


FL_b_RealInterest_correct <- ifelse(data$FKbRealInterest == 3, 1, 0)
data <- add_column(data, FL_b_RealInterest_correct, .after="FKbRealInterest")


FL_b_Risk_correct <- ifelse(data$FKbRisk == 2, 1, 0)
data <- add_column(data, FL_b_Risk_correct, .after="FKbRisk")

remove(FL_b_compoundinterest_correct, FL_b_RealInterest_correct, 
       FL_b_Risk_correct)


### dummies for correct answers advanced FL 

FL_a_StockMarket_correct <- ifelse(data$FKaStockMarket == 3, 1, 0)
data <- add_column(data, FL_a_StockMarket_correct, .after="FKaStockMarket")

FL_a_MutualFunds_correct <- ifelse(data$FKaMutualFunds == 2, 1, 0)
data <- add_column(data, FL_a_MutualFunds_correct, .after="FKaMutualFunds")


FL_a_Volatility_correct <- ifelse(data$FKaVolatility == 3, 1, 0)
data <- add_column(data, FL_a_Volatility_correct, .after="FKaVolatility")

remove(FL_a_StockMarket_correct, FL_a_MutualFunds_correct, 
       FL_a_Volatility_correct)



### dummies for correct answers ESG literacy

SL_ClimChange_correct <- ifelse(data$SLClimChange == 2, 1, 0)
data <- add_column(data, SL_ClimChange_correct, .after="SLClimChange")

SL_Sustainablility_correct <- ifelse(data$SLSustainablility == 3, 1, 0)
data <- add_column(data, SL_Sustainablility_correct, .after="SLSustainablility")

SL_SDGs_correct <- ifelse(data$SLSDGs.1. == 1 & data$SLSDGs.2. == 1 & 
                            data$SLSDGs.3. == 1, 1, 0)
data <- add_column(data, SL_SDGs_correct, .after="SLSDGs.4.")

data$SL_SDGs_correct[which(data$SLSDGs.4. == 1)] <- 0


remove(SL_ClimChange_correct, SL_Sustainablility_correct, SL_SDGs_correct)



# Indices

FL_basic <- (rowSums(data[,c("FL_b_compoundinterest_correct",  
                             "FL_b_RealInterest_correct", "FL_b_Risk_correct")], 
                     na.rm = T)/3)
data <- add_column(data, FL_basic, .after="FL_b_Risk_correct")

FL_advanced <- (rowSums(data[,c("FL_a_MutualFunds_correct",  
                                "FL_a_StockMarket_correct", 
                                "FL_a_Volatility_correct")], na.rm = T)/3)
data <- add_column(data, FL_advanced, .after="FL_a_Volatility_correct")

sust_literacy <- (rowSums(data[,c("SL_ClimChange_correct",  "SL_SDGs_correct", 
                                  "SL_Sustainablility_correct")], na.rm = T)/3)
data <- add_column(data, sust_literacy, .after="SL_Sustainablility_correct")

rm(FL_basic, FL_advanced, sust_literacy)


# Rename

data <- data %>%
  rename(FL_b_compound_interest = FKbcompoundinterest, 
         FL_b_real_interest = FKbRealInterest,
         FL_b_risk = FKbRisk,
         FL_a_mutual_funds = FKaMutualFunds, 
         FL_a_stock_market = FKaStockMarket, 
         FL_a_volatility = FKaVolatility)

data <- data %>%
  rename(SL_clim_change = SLClimChange,
         SL_SDGs_health = SLSDGs.1., 
         SL_SDGs_climate = SLSDGs.2.,
         SL_SDGs_partnership = SLSDGs.3.,
         SL_SDGs_dont_know = SLSDGs.4.,
         SL_sustainability = SLSustainablility)

data <- data %>%
  rename(hassle_inv = attitudeFinance.SQ002.,
         hassle_sust = attitudeFinance.SQ003.,
         attention_check_3 = attitudeFinance.SQ004.)

data <- data %>%
  rename(assets_sust_perc = InvSustPerc)

data <- data %>%
  rename(sust_lifestyle = attitudeSustainabili.SQ002.,
         climate_awareness = attitudeSustainabili.SQ003.)

data <- data %>%
  rename(education_economics = econEducation,
         prof_experience = profExper,
         hh_income = hhincome)

data <- data %>%
  rename(attention_check_1 = SFLgroup1.SFL99.)

data <- data %>%
  rename(attention_check_2 = SFLgroup2.SFL98.)

data <- data %>% 
  rename(assets_savings_book = assetsOverallCurrent.1.,
         assets_current_account = assetsOverallCurrent.2., 
         assets_building_loan_contract = assetsOverallCurrent.3., 
         assets_cash = assetsOverallCurrent.4.,
         assets_saving_plan = assetsOverallCurrent.5., 
         assets_funds = assetsOverallCurrent.6., 
         assets_single_stocks = assetsOverallCurrent.7., 
         assets_single_bonds = assetsOverallCurrent.8., 
         assets_cryptocurrencies = assetsOverallCurrent.9., 
         assets_retirement_saving = assetsOverallCurrent.10.,
         assets_other_gold_immo = assetsOverallCurrent.11.,
         assets_none = assetsOverallCurrent.12.)

data <- data %>% 
  rename(assets_sust_savings_book = assetsSustainable.1.,
         assets_sust_current_account = assetsSustainable.2.,
         assets_sust_saving_plan = assetsSustainable.3.,
         assets_sust_funds = assetsSustainable.4.,
         assets_sust_single_stock = assetsSustainable.5.,
         assets_sust_single_bond = assetsSustainable.6.,
         assets_sust_retirement_saving = assetsSustainable.7.,
         assets_sust_none = assetsSustainable.8.)



### stock market participants


stock_market_participant <- ifelse(data$assets_funds == 1 |
                                     data$assets_single_bonds == 1 | 
                                     data$assets_single_stocks == 1, 1, 0)
data <- add_column(data, stock_market_participant, .after="assets_none")

sustainable_stock_market_participant <- ifelse(data$assets_sust_funds == 1 | 
                                                 data$assets_sust_single_bond == 1 |
                                                 data$assets_sust_single_stock == 1, 
                                               1, 0)
data <- add_column(data, sustainable_stock_market_participant, 
                   .after="assets_sust_none")

rm(stock_market_participant, sustainable_stock_market_participant)


## Variable for failing attention checks

attention_check_1_failed <- ifelse(data$attention_check_1 == 5, 0, 1)
data <- add_column(data, attention_check_1_failed, .after="comment")
rm(attention_check_1_failed)

attention_check_2_failed <- ifelse(data$attention_check_2 == 1, 0, 1)
data <- add_column(data, attention_check_2_failed, 
                   .after="attention_check_1_failed")


attention_check_3_failed <- ifelse(data$attention_check_3 == 1, 0, 1)
data <- add_column(data, attention_check_3_failed, 
                   .after="attention_check_2_failed")

rm(attention_check_2_failed)

rm(attention_check_3_failed)


data <- data %>%
  relocate(attention_check_1, .after = comment) 
data <- data %>%
  relocate(attention_check_2, .after = attention_check_1_failed) 
data <- data %>%
  relocate(attention_check_3, .after = attention_check_2_failed) 




# SFL renaming and recoding

### ESG meaning

table(data$SFLgroup1.SFL1.)
data <- data %>%
  rename(i1_SFL_ESG_meaning = SFLgroup1.SFL1.) %>%
  relocate(i1_SFL_ESG_meaning, .before = FL_b_compound_interest)
table(data$i1_SFL_ESG_meaning)


### ESG strategy
table(data$SFLgroup3.SFL2.)
data <- data %>%
  rename(i2_SFL_ESG_strategy_recoded = SFLgroup3.SFL2.)

table(data$i2_SFL_ESG_strategy_recoded)
data$i2_SFL_ESG_strategy_recoded <- as.numeric(
  dplyr::recode(data$i2_SFL_ESG_strategy_recoded, 
                "1" = "5", "2" = "4", "3" = "3", "4" = "2", "5" = "1", "6" ="6"))
table(data$i2_SFL_ESG_strategy_recoded)

data <- data %>% 
  relocate(i2_SFL_ESG_strategy_recoded, .before = FL_b_compound_interest) 
table(data$i2_SFL_ESG_strategy_recoded)


### Greenwashing
table(data$SFLgroup3.SFL3.)
data <- data %>%
  rename(i3_SFL_greenwashing_meaning = SFLgroup3.SFL3.)

data <- data %>% 
  relocate(i3_SFL_greenwashing_meaning, .before = FL_b_compound_interest) 
table(data$i3_SFL_greenwashing_meaning)

### Labels
table(data$SFLgroup3.SFL4.)
data <- data %>%
  rename(i4_SFL_labels = SFLgroup3.SFL4.) %>% 
  relocate(i4_SFL_labels, .before = FL_b_compound_interest) 
table(data$i4_SFL_labels)


### Taxomony meaning
table(data$SFLgroup2.SFL5.)
data <- data %>%
  rename(i5_SFL_taxonomy_meaning = SFLgroup2.SFL5.) %>% 
  relocate(i5_SFL_taxonomy_meaning, .before = FL_b_compound_interest) 
table(data$i5_SFL_taxonomy_meaning)


### Criteria sustainable investment
table(data$SFLgroup2.SFL6.)
data <- data %>%
  rename(i6_SFL_criteria_SI = SFLgroup2.SFL6.) %>% 
  relocate(i6_SFL_criteria_SI, .before = FL_b_compound_interest) 
table(data$i6_SFL_criteria_SI)


### Preference elicitation
table(data$SFLgroup2.SFL7.)
data <- data %>%
  rename(i7_SFL_preference_elicitation = SFLgroup2.SFL7.) %>% 
  relocate(i7_SFL_preference_elicitation, .before = FL_b_compound_interest) 
table(data$i7_SFL_preference_elicitation)

### Ratings
table(data$SFLgroup3.SFL8.)
data <- data %>%
  rename(i8_SFL_ratings_recoded = SFLgroup3.SFL8.)

table(data$i8_SFL_ratings_recoded)
data$i8_SFL_ratings_recoded <- as.numeric(
  dplyr::recode(data$i8_SFL_ratings_recoded, 
                "1" = "5", "2" = "4", "3" = "3", "4" = "2", "5" = "1", "6" = "6"))
table(data$i8_SFL_ratings_recoded)

data <- data %>% 
  relocate(i8_SFL_ratings_recoded, .before = FL_b_compound_interest) 
table(data$i8_SFL_ratings_recoded)


### Name
table(data$SFLgroup1.SFL9.)
data <- data %>%
  rename(i9_SFL_name_recoded = SFLgroup1.SFL9.)

table(data$i9_SFL_name_recoded)
data$i9_SFL_name_recoded <- as.numeric(
  dplyr::recode(data$i9_SFL_name_recoded, 
                "1" = "5", "2" = "4", "3" = "3", "4" = "2", "5" = "1", "6" = "6"))
table(data$i9_SFL_name_recoded)

data <- data %>% 
  relocate(i9_SFL_name_recoded, .before = FL_b_compound_interest) 
table(data$i9_SFL_name_recoded)


### Greenwashing identification
table(data$SFLgroup5.SFL10.)
data <- data %>%
  rename(i10_SFL_greenwashing_identification = SFLgroup5.SFL10.) %>% 
  relocate(i10_SFL_greenwashing_identification, .before = FL_b_compound_interest) 
table(data$i10_SFL_greenwashing_identification)



### Documents
table(data$SFLgroup4.SFL11.)
data <- data %>%
  rename(i11_SFL_documents = SFLgroup4.SFL11.) %>% 
  relocate(i11_SFL_documents, .before = FL_b_compound_interest) 
table(data$i11_SFL_documents)


### Exclusion criteria
table(data$SFLgroup2.SFL12.)
data <- data %>%
  rename(i12_SFL_exclusion_criteria = SFLgroup2.SFL12.) %>% 
  relocate(i12_SFL_exclusion_criteria, .before = FL_b_compound_interest) 
table(data$i12_SFL_exclusion_criteria)


### best in class
table(data$SFLgroup5.SFL13.)
data <- data %>%
  rename(i13_SFL_best_in_class = SFLgroup5.SFL13.) %>% 
  relocate(i13_SFL_best_in_class, .before = FL_b_compound_interest) 
table(data$i13_SFL_best_in_class)


### divestment 
table(data$SFLgroup2.SFL14.)
data <- data %>%
  rename(i14_SFL_divestment = SFLgroup2.SFL14.) %>% 
  relocate(i14_SFL_divestment, .before = FL_b_compound_interest) 
table(data$i14_SFL_divestment)


### individual strategy 
table(data$SFLgroup4.SFL15.)
data <- data %>%
  rename(i15_SFL_individual_strategy = SFLgroup4.SFL15.) %>% 
  relocate(i15_SFL_individual_strategy, .before = FL_b_compound_interest) 
table(data$i15_SFL_individual_strategy)


### products
table(data$SFLgroup4.SFL16.)
data <- data %>%
  rename(i16_SFL_products = SFLgroup4.SFL16.) %>% 
  relocate(i16_SFL_products, .before = FL_b_compound_interest) 
table(data$i16_SFL_products)


### product purchase recoded
table(data$SFLgroup5.SFL17.)
data <- data %>%
  rename(i17_SFL_product_purchase_recoded = SFLgroup5.SFL17.)

table(data$i17_SFL_product_purchase_recoded)
data$i17_SFL_product_purchase_recoded <- as.numeric(
  dplyr::recode(data$i17_SFL_product_purchase_recoded, 
                "1" = "5", "2" = "4", "3" = "3", "4" = "2", "5" = "1", "6" = "6"))
table(data$i17_SFL_product_purchase_recoded)

data <- data %>% 
  relocate(i17_SFL_product_purchase_recoded, .before = FL_b_compound_interest) 
table(data$i17_SFL_product_purchase_recoded)


### product increase
table(data$SFLgroup1.SFL18.)
data <- data %>%
  rename(i18_SFL_product_increase = SFLgroup1.SFL18.) %>% 
  relocate(i18_SFL_product_increase, .before = FL_b_compound_interest) 
table(data$i18_SFL_product_increase)


### fees
table(data$SFLgroup5.SFL19.)
data <- data %>%
  rename(i19_SFL_fees_recoded = SFLgroup5.SFL19.)

table(data$i19_SFL_fees_recoded)
data$i19_SFL_fees_recoded <- as.numeric(
  dplyr::recode(data$i19_SFL_fees_recoded, 
                "1" = "5", "2" = "4", "3" = "3", "4" = "2", "5" = "1", "6" = "6"))
table(data$i19_SFL_fees_recoded)

data <- data %>% 
  relocate(i19_SFL_fees_recoded, .before = FL_b_compound_interest) 
table(data$i19_SFL_fees_recoded)


### impact economy
table(data$SFLgroup5.SFL20.)
data <- data %>%
  rename(i20_SFL_impact_economy = SFLgroup5.SFL20.) %>% 
  relocate(i20_SFL_impact_economy, .before = FL_b_compound_interest) 
table(data$i20_SFL_impact_economy)


### impact environment recoded
table(data$SFLgroup3.SFL21.)
data <- data %>%
  rename(i21_SFL_impact_environment_recoded = SFLgroup3.SFL21.)

table(data$i21_SFL_impact_environment_recoded)
data$i21_SFL_impact_environment_recoded <- as.numeric(
  dplyr::recode(data$i21_SFL_impact_environment_recoded, 
                "1" = "5", "2" = "4", "3" = "3", "4" = "2", "5" = "1", "6" = "6"))
table(data$i21_SFL_impact_environment_recoded)

data <- data %>% 
  relocate(i21_SFL_impact_environment_recoded, .before = FL_b_compound_interest) 
table(data$i21_SFL_impact_environment_recoded)



### engagement
table(data$SFLgroup1.SFL22.)
data <- data %>%
  rename(i22_SFL_engagement = SFLgroup1.SFL22.) %>% 
  relocate(i22_SFL_engagement, .before = FL_b_compound_interest) 
table(data$i22_SFL_engagement)


### impact investments recoded
table(data$SFLgroup1.SFL23.)
data <- data %>%
  rename(i23_SFL_impact_investments_recoded = SFLgroup1.SFL23.)

table(data$i23_SFL_impact_investments_recoded)
data$i23_SFL_impact_investments_recoded <- as.numeric(
  dplyr::recode(data$i23_SFL_impact_investments_recoded, 
                "1" = "5", "2" = "4", "3" = "3", "4" = "2", "5" = "1", "6" = "6"))
table(data$i23_SFL_impact_investments_recoded)

data <- data %>% 
  relocate(i23_SFL_impact_investments_recoded, .before = FL_b_compound_interest) 
table(data$i23_SFL_impact_investments_recoded)


### sustainability risk
table(data$SFLgroup5.SFL24.)
data <- data %>%
  rename(i24_SFL_sustainability_risk = SFLgroup5.SFL24.) %>% 
  relocate(i24_SFL_sustainability_risk, .before = FL_b_compound_interest) 
table(data$i24_SFL_sustainability_risk)


### stranded assets
table(data$SFLgroup2.SFL25.)
data <- data %>%
  rename(i25_SFL_stranded_assets = SFLgroup2.SFL25.) %>% 
  relocate(i25_SFL_stranded_assets, .before = FL_b_compound_interest) 
table(data$i25_SFL_stranded_assets)


### performance recoded
table(data$SFLgroup4.SFL26.)
data <- data %>%
  rename(i26_SFL_performance_recoded = SFLgroup4.SFL26.)

table(data$i26_SFL_performance_recoded)
data$i26_SFL_performance_recoded <- as.numeric(
  dplyr::recode(data$i26_SFL_performance_recoded, 
                "1" = "5", "2" = "4", "3" = "3", "4" = "2", "5" = "1", "6" = "6"))
table(data$i26_SFL_performance_recoded)

data <- data %>% 
  relocate(i26_SFL_performance_recoded, .before = FL_b_compound_interest) 
table(data$i26_SFL_performance_recoded)


### costs performance recoded
table(data$SFLgroup4.SFL27.)
data <- data %>%
  rename(i27_SFL_costs_performance_recoded = SFLgroup4.SFL27.)

table(data$i27_SFL_costs_performance_recoded)
data$i27_SFL_costs_performance_recoded <- as.numeric(
  dplyr::recode(data$i27_SFL_costs_performance_recoded, 
                "1" = "5", "2" = "4", "3" = "3", "4" = "2", "5" = "1", "6" = "6"))
table(data$i27_SFL_costs_performance_recoded)

data <- data %>% 
  relocate(i27_SFL_costs_performance_recoded, .before = FL_b_compound_interest) 
table(data$i27_SFL_costs_performance_recoded)


### green loans
table(data$SFLgroup1.SFL28.)
data <- data %>%
  rename(i28_SFL_green_loans = SFLgroup1.SFL28.) %>% 
  relocate(i28_SFL_green_loans, .before = FL_b_compound_interest) 
table(data$i28_SFL_green_loans)


### saving sustainable
table(data$SFLgroup3.SFL29.)
data <- data %>%
  rename(i29_SFL_saving_sustainable = SFLgroup3.SFL29.) %>% 
  relocate(i29_SFL_saving_sustainable, .before = FL_b_compound_interest) 
table(data$i29_SFL_saving_sustainable)


### banks recoded
table(data$SFLgroup4.SFL30.)
data <- data %>%
  rename(i30_SFL_banks_recoded = SFLgroup4.SFL30.)

table(data$i30_SFL_banks_recoded)
data$i30_SFL_banks_recoded <- as.numeric(
  dplyr::recode(data$i30_SFL_banks_recoded, 
                "1" = "5", "2" = "4", "3" = "3", "4" = "2", "5" = "1", "6" = "6"))
table(data$i30_SFL_banks_recoded)

data <- data %>% 
  relocate(i30_SFL_banks_recoded, .before = FL_b_compound_interest) 
table(data$i30_SFL_banks_recoded)


#### replace dont know with NA
data$i1_SFL_ESG_meaning <- 
  replace(data$i1_SFL_ESG_meaning, data$i1_SFL_ESG_meaning == 6, NA)
data$i2_SFL_ESG_strategy_recoded <- 
  replace(data$i2_SFL_ESG_strategy_recoded, 
          data$i2_SFL_ESG_strategy_recoded == 6, NA)
data$i3_SFL_greenwashing_meaning <- 
  replace(data$i3_SFL_greenwashing_meaning, 
          data$i3_SFL_greenwashing_meaning == 6, NA)
data$i4_SFL_labels <- 
  replace(data$i4_SFL_labels, data$i4_SFL_labels == 6, NA)
data$i5_SFL_taxonomy_meaning <- 
  replace(data$i5_SFL_taxonomy_meaning, data$i5_SFL_taxonomy_meaning == 6, NA)
data$i6_SFL_criteria_SI <- 
  replace(data$i6_SFL_criteria_SI, data$i6_SFL_criteria_SI == 6, NA)
data$i7_SFL_preference_elicitation <- 
  replace(data$i7_SFL_preference_elicitation, 
          data$i7_SFL_preference_elicitation == 6, NA)
data$i8_SFL_ratings_recoded <- 
  replace(data$i8_SFL_ratings_recoded, data$i8_SFL_ratings_recoded == 6, NA)
data$i9_SFL_name_recoded <- 
  replace(data$i9_SFL_name_recoded, data$i9_SFL_name_recoded == 6, NA)
data$i10_SFL_greenwashing_identification <- 
  replace(data$i10_SFL_greenwashing_identification, 
          data$i10_SFL_greenwashing_identification == 6, NA)
data$i11_SFL_documents <- 
  replace(data$i11_SFL_documents, data$i11_SFL_documents == 6, NA)
data$i12_SFL_exclusion_criteria <- 
  replace(data$i12_SFL_exclusion_criteria, data$i12_SFL_exclusion_criteria == 6, NA)
data$i13_SFL_best_in_class <- 
  replace(data$i13_SFL_best_in_class, data$i13_SFL_best_in_class == 6, NA)
data$i14_SFL_divestment <- 
  replace(data$i14_SFL_divestment, data$i14_SFL_divestment == 6, NA)
data$i15_SFL_individual_strategy <- replace(data$i15_SFL_individual_strategy, 
                                            data$i15_SFL_individual_strategy == 6, 
                                            NA)
data$i16_SFL_products <- 
  replace(data$i16_SFL_products, data$i16_SFL_products == 6, NA)
data$i17_SFL_product_purchase_recoded <- 
  replace(data$i17_SFL_product_purchase_recoded, 
          data$i17_SFL_product_purchase_recoded == 6, NA)
data$i18_SFL_product_increase <- 
  replace(data$i18_SFL_product_increase, data$i18_SFL_product_increase == 6, NA)
data$i19_SFL_fees_recoded <- 
  replace(data$i19_SFL_fees_recoded, data$i19_SFL_fees_recoded == 6, NA)
data$i20_SFL_impact_economy <- 
  replace(data$i20_SFL_impact_economy, data$i20_SFL_impact_economy == 6, NA)
data$i21_SFL_impact_environment_recoded <- 
  replace(data$i21_SFL_impact_environment_recoded, 
          data$i21_SFL_impact_environment_recoded == 6, NA)
data$i22_SFL_engagement <- 
  replace(data$i22_SFL_engagement, data$i22_SFL_engagement == 6, NA)
data$i23_SFL_impact_investments_recoded <- 
  replace(data$i23_SFL_impact_investments_recoded, 
          data$i23_SFL_impact_investments_recoded == 6, NA)
data$i24_SFL_sustainability_risk <- 
  replace(data$i24_SFL_sustainability_risk, 
          data$i24_SFL_sustainability_risk == 6, NA)
data$i25_SFL_stranded_assets <- 
  replace(data$i25_SFL_stranded_assets, data$i25_SFL_stranded_assets == 6, NA)
data$i26_SFL_performance_recoded <- 
  replace(data$i26_SFL_performance_recoded, 
          data$i26_SFL_performance_recoded == 6, NA)
data$i27_SFL_costs_performance_recoded <- 
  replace(data$i27_SFL_costs_performance_recoded, 
          data$i27_SFL_costs_performance_recoded == 6, NA)
data$i28_SFL_green_loans <- 
  replace(data$i28_SFL_green_loans, data$i28_SFL_green_loans == 6, NA)
data$i29_SFL_saving_sustainable <- 
  replace(data$i29_SFL_saving_sustainable, 
          data$i29_SFL_saving_sustainable == 6, NA)
data$i30_SFL_banks_recoded <- 
  replace(data$i30_SFL_banks_recoded, data$i30_SFL_banks_recoded == 6, NA)



### SFL recode to correctly/falsely answered
data$i1_correct <- ifelse(data$i1_SFL_ESG_meaning ==4 | 
                            data$i1_SFL_ESG_meaning == 5, 1, 0)
data$i2_correct <- ifelse(data$i2_SFL_ESG_strategy_recoded ==4 | 
                            data$i2_SFL_ESG_strategy_recoded == 5, 1, 0)
data$i3_correct <- ifelse(data$i3_SFL_greenwashing_meaning ==4 | 
                            data$i3_SFL_greenwashing_meaning == 5, 1, 0)
data$i4_correct <- ifelse(data$i4_SFL_labels ==4 | data$i4_SFL_labels == 5, 1, 0)
data$i5_correct <- ifelse(data$i5_SFL_taxonomy_meaning ==4 | 
                            data$i5_SFL_taxonomy_meaning == 5, 1, 0)
data$i6_correct <- ifelse(data$i6_SFL_criteria_SI ==4 | 
                            data$i6_SFL_criteria_SI == 5, 1, 0)
data$i7_correct <- ifelse(data$i7_SFL_preference_elicitation ==4 | 
                            data$i7_SFL_preference_elicitation == 5, 1, 0)
data$i8_correct <- ifelse(data$i8_SFL_ratings_recoded ==4 | 
                            data$i8_SFL_ratings_recoded == 5, 1, 0)
data$i9_correct <- ifelse(data$i9_SFL_name_recoded ==4 | 
                            data$i9_SFL_name_recoded == 5, 1, 0)
data$i10_correct <- ifelse(data$i10_SFL_greenwashing_identification ==4 | 
                             data$i10_SFL_greenwashing_identification == 5, 1, 0)
data$i11_correct <- ifelse(data$i11_SFL_documents ==4 | 
                             data$i11_SFL_documents == 5, 1, 0)
data$i12_correct <- ifelse(data$i12_SFL_exclusion_criteria ==4 | 
                             data$i12_SFL_exclusion_criteria == 5, 1, 0)
data$i13_correct <- ifelse(data$i13_SFL_best_in_class ==4 | 
                             data$i13_SFL_best_in_class == 5, 1, 0)
data$i14_correct <- ifelse(data$i14_SFL_divestment ==4 | 
                             data$i14_SFL_divestment == 5, 1, 0)
data$i15_correct <- ifelse(data$i15_SFL_individual_strategy ==4 | 
                             data$i15_SFL_individual_strategy == 5, 1, 0)
data$i16_correct <- ifelse(data$i16_SFL_products ==4 | 
                             data$i16_SFL_products == 5, 1, 0)
data$i17_correct <- ifelse(data$i17_SFL_product_purchase_recoded ==4 | 
                             data$i17_SFL_product_purchase_recoded == 5, 1, 0)
data$i18_correct <- ifelse(data$i18_SFL_product_increase ==4 | 
                             data$i18_SFL_product_increase == 5, 1, 0)
data$i19_correct <- ifelse(data$i19_SFL_fees_recoded ==4 | 
                             data$i19_SFL_fees_recoded == 5, 1, 0)
data$i20_correct <- ifelse(data$i20_SFL_impact_economy ==4 | 
                             data$i20_SFL_impact_economy == 5, 1, 0)
data$i21_correct <- ifelse(data$i21_SFL_impact_environment_recoded ==4 | 
                             data$i21_SFL_impact_environment_recoded == 5, 1, 0)
data$i22_correct <- ifelse(data$i22_SFL_engagement ==4 | 
                             data$i22_SFL_engagement == 5, 1, 0)
data$i23_correct <- ifelse(data$i23_SFL_impact_investments_recoded ==4 | 
                             data$i23_SFL_impact_investments_recoded == 5, 1, 0)
data$i24_correct <- ifelse(data$i24_SFL_sustainability_risk ==4 | 
                             data$i24_SFL_sustainability_risk == 5, 1, 0)
data$i25_correct <- ifelse(data$i25_SFL_stranded_assets ==4 | 
                             data$i25_SFL_stranded_assets == 5, 1, 0)
data$i26_correct <- ifelse(data$i26_SFL_performance_recoded ==4 | 
                             data$i26_SFL_performance_recoded == 5, 1, 0)
data$i27_correct <- ifelse(data$i27_SFL_costs_performance_recoded ==4 | 
                             data$i27_SFL_costs_performance_recoded == 5, 1, 0)
data$i28_correct <- ifelse(data$i28_SFL_green_loans ==4 | 
                             data$i28_SFL_green_loans == 5, 1, 0)
data$i29_correct <- ifelse(data$i29_SFL_saving_sustainable ==4 | 
                             data$i29_SFL_saving_sustainable == 5, 1, 0)
data$i30_correct <- ifelse(data$i30_SFL_banks_recoded ==4 | 
                             data$i30_SFL_banks_recoded == 5, 1, 0)



## Data Exclusion 

length(which(rowSums(data[,c(which(colnames(data) =="i1_SFL_ESG_meaning"):
                               which(colnames(data) =="i30_SFL_banks_recoded"))], 
                     na.rm =T) == 0))

data <- data %>%
  dplyr::filter(rowSums(data[,c(which(colnames(data) =="i1_SFL_ESG_meaning"):
                                  which(colnames(data) =="i30_SFL_banks_recoded"))], 
                        na.rm =T) != 0)

length(which(data$attention_check_1_failed == 1 & 
               data$attention_check_3_failed == 1))
data <- data[-c(which(data$attention_check_1_failed == 1 & 
                        data$attention_check_3_failed == 1)), ]


length(which(data$attention_check_2_failed == 1 & 
               data$attention_check_3_failed == 1))
data <- data[-c(which(data$attention_check_2_failed == 1 & 
                        data$attention_check_3_failed == 1)), ]




# replace "no response"
table(data$education)
data$education[which(data$education == 10)] <- NA
data$education[which(data$education == 9)] <- NA

## no income recoded

table(data$hh_income)
data$hh_income[which(data$hh_income == 10)] <- NA


data_study1 <- data
rm(data)


data_study1 <- data_study1 %>% rename(hhincome = "hh_income")

table(data_study1$hhincome)
hhincome_not_reported <- ifelse(is.na(data_study1$hhincome), 1, 0)
data_study1 <- add_column(data_study1, hhincome_not_reported, .after="hhincome")

table(data_study1$hhincome_not_reported)

table(data_study1$hhincome)
median(data_study1$hhincome,na.rm = T)


high_income <- ifelse(data_study1$hhincome > 4, 1, 0)
table(high_income)
data_study1 <- add_column(data_study1, high_income, .after="hhincome")
data_study1$high_income[which(is.na(data_study1$high_income))] <- 0


rm(high_income)
rm(hhincome_not_reported)



# new vars
data_study1$sustainable_stock_market_participant[which(
  is.na(data_study1$sustainable_stock_market_participant))] <- 0

data_study1$university_degree <- ifelse(data_study1$education == 7 | 
                                          data_study1$education == 8, 1, 0)
data_study1$university_degree[which(is.na(data_study1$education))] <- 0


SFL30 <- (rowSums(data_study1[,c(which(colnames(data_study1)== "i1_correct"):
                                   which(colnames(data_study1)== "i30_correct"))], 
                  na.rm = T)/30)
data_study1 <- add_column(data_study1, SFL30, .after="i30_correct")


SFL7 <- (rowSums(data_study1[,c(which(colnames(data_study1)== "i3_correct"), 
                                which(colnames(data_study1)== "i4_correct"), 
                                which(colnames(data_study1)== "i5_correct"), 
                                which(colnames(data_study1)== "i12_correct"), 
                                which(colnames(data_study1)== "i16_correct"), 
                                which(colnames(data_study1)== "i19_correct"), 
                                which(colnames(data_study1)== "i26_correct"))], 
                 na.rm = T)/7)
data_study1 <- add_column(data_study1, SFL7, .after="SFL30")

SFL5 <- (rowSums(data_study1[,c(which(colnames(data_study1)== "i3_correct"), 
                                which(colnames(data_study1)== "i12_correct"), 
                                which(colnames(data_study1)== "i16_correct"), 
                                which(colnames(data_study1)== "i19_correct"), 
                                which(colnames(data_study1)== "i26_correct"))], 
                 na.rm = T)/5)
data_study1 <- add_column(data_study1, SFL5, .after="SFL7")

SFL27 <- (rowSums(data_study1[,c(which(colnames(data_study1)== "i1_correct"):
                                   which(colnames(data_study1)== "i8_correct"), 
                                 which(colnames(data_study1)== "i10_correct"):
                                   which(colnames(data_study1)== "i20_correct"), 
                                 which(colnames(data_study1)== "i22_correct"), 
                                 which(colnames(data_study1)== "i24_correct"):
                                   which(colnames(data_study1)== "i30_correct"))], 
                  na.rm = T)/27)

data_study1 <- add_column(data_study1, SFL27, .after="SFL5")
rm(SFL5, SFL7, SFL27, SFL30)


SFL4 <- (rowSums(data_study1[,c(which(colnames(data_study1)== "i3_correct"), 
                                which(colnames(data_study1)== "i12_correct"), 
                                which(colnames(data_study1)== "i16_correct"), 
                                which(colnames(data_study1)== "i26_correct"))], 
                 na.rm = T)/4)
data_study1 <- add_column(data_study1, SFL4, .after="SFL5")


SFL3 <- (rowSums(data_study1[,c(which(colnames(data_study1)== "i3_correct"), 
                                which(colnames(data_study1)== "i12_correct"), 
                                which(colnames(data_study1)== "i26_correct"))], 
                 na.rm = T)/3)
data_study1 <- add_column(data_study1, SFL3, .after="SFL4")

rm(SFL3, SFL4)



# Data management Study 2 --------------

# Read data study2 ---------------

data <- read.csv(here("data-raw", "data_raw_without_mail_study2.csv"))


# filter incomplete ---------
table(data$lastpage)
table(data$attentioncheck1)

data_incomplete <- data %>% dplyr::filter(lastpage <10)
table(data_incomplete$lastpage)


data <- data %>% dplyr::filter(lastpage >= 10)

table(data$manipcheckgw)
table(data$lastpage)
table(data$attentioncheck1)
table(data$badquality)
table(data$email_not_provided)


# remove variables ----

data <- data %>%
  subset(select = -c(funds1:funds8, startlanguage, seed, InfoLink, Hinweis, 
                     G02Q60,  fundsCopy, Debriefing, revisedecision, 
                     fundsCopyCopy, fondscopy11, warmglow))

# timings: create df with timings -----

data <- data %>% rename(
  duration = interviewtime)

timings <- data  %>%
  dplyr::select(c(duration, contains("Time"))) 

timings <- timings  %>%
  dplyr::select(c(duration, contains("groupTime"))) 

timings$overall <- round((rowSums(timings[,c(2:ncol(timings))], na.rm =T )/60),2)

timings <- timings %>%
  relocate(overall, .after = duration )



# check timings of participants ------------

#max(data$duration)
#min(data$duration)

#mean(data$duration)
#sd(data$duration)
median(data$duration)

#x <- data %>% dplyr::filter(data$duration > mean(data$duration) + 
                   #           2*sd(data$duration))
#x$duration

#rm(x)
#which(data$duration >= mean(data$duration) + 2*sd(data$duration))



#investment
which(data$groupTime100 < 30)
mean(data$groupTime100)
sd(data$groupTime100)

data$timing_investment <- ifelse(data$groupTime100 < 30, 1, 0)
table(data$timing_investment)


#SFL
which(data$groupTime109 < 30)
mean(data$groupTime109)
sd(data$groupTime109)
data$timing_sfl <- ifelse(data$groupTime109 < 30, 1, 0)
table(data$timing_sfl)


#revised ivnes
#which(data$groupTime157 < 30)
#mean(data$groupTime157,na.rm=T)
#sd(data$groupTime157,na.rm=T)
#data$timing_rev_inv <- ifelse(data$groupTime157 < 30, 1, 0)
#table(data$timing_rev_inv)

table(data$timing_investment, data$timing_sfl)




# remove all timings except interview time

data <- data %>%
  dplyr::select(-c(contains("Time"))) 


# rename variables ----------

data <- data %>% rename(
  investmentdecision_A_old = investmentdecision.A., 
  investmentdecision_B_old = investmentdecision.B., 
  investmentdecision_C_old = investmentdecision.C., 
  investmentdecision_D_old = investmentdecision.D., 
  investmentdecision_S_old = investmentdecision.S., 
  manipcheckrating_A_old = manipcheckrating.A.,
  manipcheckrating_B_old = manipcheckrating.B.,
  manipcheckrating_C_old = manipcheckrating.C.,
  manipcheckrating_D_old = manipcheckrating.D.,
  manipcheckrating_S_old = manipcheckrating.S.,
  revisedinvestment_A_old = revisedinvestment.A.,
  revisedinvestment_B_old = revisedinvestment.B.,
  revisedinvestment_C_old = revisedinvestment.C.,
  revisedinvestment_D_old = revisedinvestment.D.,
  revisedinvestment_S_old = revisedinvestment.S.,
  manipcheckgw_old = manipcheckgw)



## Recoding randomization of funds  --------


data <- data[order(data$randnumber),]


r1 <- data %>% dplyr::filter(randnumber == 1)

r1$investmentdecision_A <- r1$investmentdecision_A_old
r1$investmentdecision_B <- r1$investmentdecision_B_old
r1$investmentdecision_C <- r1$investmentdecision_C_old
r1$investmentdecision_D <- r1$investmentdecision_D_old
r1$investmentdecision_S <- r1$investmentdecision_S_old

r1$revisedinvestment_A <- r1$revisedinvestment_A_old
r1$revisedinvestment_B <- r1$revisedinvestment_B_old
r1$revisedinvestment_C <- r1$revisedinvestment_C_old
r1$revisedinvestment_D <- r1$revisedinvestment_D_old
r1$revisedinvestment_S <- r1$revisedinvestment_S_old

r1$manipcheckrating_A <- r1$manipcheckrating_A_old
r1$manipcheckrating_B <- r1$manipcheckrating_B_old
r1$manipcheckrating_C <- r1$manipcheckrating_C_old
r1$manipcheckrating_D <- r1$manipcheckrating_D_old
r1$manipcheckrating_S <- r1$manipcheckrating_S_old


r1$manipcheckgw <- r1$manipcheckgw_old





r2 <- data %>% dplyr::filter(randnumber == 2)

r2$investmentdecision_A <- r2$investmentdecision_A_old
r2$investmentdecision_B <- r2$investmentdecision_D_old
r2$investmentdecision_C <- r2$investmentdecision_C_old
r2$investmentdecision_D <- r2$investmentdecision_B_old
r2$investmentdecision_S <- r2$investmentdecision_S_old

r2$revisedinvestment_A <- r2$revisedinvestment_A_old
r2$revisedinvestment_B <- r2$revisedinvestment_D_old
r2$revisedinvestment_C <- r2$revisedinvestment_C_old
r2$revisedinvestment_D <- r2$revisedinvestment_B_old
r2$revisedinvestment_S <- r2$revisedinvestment_S_old

r2$manipcheckrating_A <- r2$manipcheckrating_A_old
r2$manipcheckrating_B <- r2$manipcheckrating_D_old
r2$manipcheckrating_C <- r2$manipcheckrating_C_old
r2$manipcheckrating_D <- r2$manipcheckrating_B_old
r2$manipcheckrating_S <- r2$manipcheckrating_S_old



r2$manipcheckgw <- dplyr::recode(r2$manipcheckgw_old, 
                          "A" = "A",
                          "B" = "D",
                          "C" = "C",
                          "D" = "B",
                          "S" = "S")




r3 <- data %>% dplyr::filter(randnumber == 3)

r3$investmentdecision_A <- r3$investmentdecision_B_old
r3$investmentdecision_B <- r3$investmentdecision_A_old
r3$investmentdecision_C <- r3$investmentdecision_D_old
r3$investmentdecision_D <- r3$investmentdecision_C_old
r3$investmentdecision_S <- r3$investmentdecision_S_old

r3$revisedinvestment_A <- r3$revisedinvestment_B_old
r3$revisedinvestment_B <- r3$revisedinvestment_A_old
r3$revisedinvestment_C <- r3$revisedinvestment_D_old
r3$revisedinvestment_D <- r3$revisedinvestment_C_old
r3$revisedinvestment_S <- r3$revisedinvestment_S_old

r3$manipcheckrating_A <- r3$manipcheckrating_B_old
r3$manipcheckrating_B <- r3$manipcheckrating_A_old
r3$manipcheckrating_C <- r3$manipcheckrating_D_old
r3$manipcheckrating_D <- r3$manipcheckrating_C_old
r3$manipcheckrating_S <- r3$manipcheckrating_S_old


r3$manipcheckgw <- dplyr::recode(r3$manipcheckgw_old, 
                          "A" = "B",
                          "B" = "A",
                          "C" = "D",
                          "D" = "C",
                          "S" = "S")



r4 <- data %>% dplyr::filter(randnumber == 4)

r4$investmentdecision_A <- r4$investmentdecision_D_old
r4$investmentdecision_B <- r4$investmentdecision_A_old
r4$investmentdecision_C <- r4$investmentdecision_B_old
r4$investmentdecision_D <- r4$investmentdecision_C_old
r4$investmentdecision_S <- r4$investmentdecision_S_old

r4$revisedinvestment_A <- r4$revisedinvestment_D_old
r4$revisedinvestment_B <- r4$revisedinvestment_A_old
r4$revisedinvestment_C <- r4$revisedinvestment_B_old
r4$revisedinvestment_D <- r4$revisedinvestment_C_old
r4$revisedinvestment_S <- r4$revisedinvestment_S_old

r4$manipcheckrating_A <- r4$manipcheckrating_D_old
r4$manipcheckrating_B <- r4$manipcheckrating_A_old
r4$manipcheckrating_C <- r4$manipcheckrating_B_old
r4$manipcheckrating_D <- r4$manipcheckrating_C_old
r4$manipcheckrating_S <- r4$manipcheckrating_S_old


r4$manipcheckgw <- dplyr::recode(r4$manipcheckgw_old, 
                          "A" = "B",
                          "B" = "C",
                          "C" = "D",
                          "D" = "A",
                          "S" = "S")


r5 <- data %>% dplyr::filter(randnumber == 5)

r5$investmentdecision_A <- r5$investmentdecision_C_old
r5$investmentdecision_B <- r5$investmentdecision_B_old
r5$investmentdecision_C <- r5$investmentdecision_A_old
r5$investmentdecision_D <- r5$investmentdecision_D_old
r5$investmentdecision_S <- r5$investmentdecision_S_old

r5$revisedinvestment_A <- r5$revisedinvestment_C_old
r5$revisedinvestment_B <- r5$revisedinvestment_B_old
r5$revisedinvestment_C <- r5$revisedinvestment_A_old
r5$revisedinvestment_D <- r5$revisedinvestment_D_old
r5$revisedinvestment_S <- r5$revisedinvestment_S_old

r5$manipcheckrating_A <- r5$manipcheckrating_C_old
r5$manipcheckrating_B <- r5$manipcheckrating_B_old
r5$manipcheckrating_C <- r5$manipcheckrating_A_old
r5$manipcheckrating_D <- r5$manipcheckrating_D_old
r5$manipcheckrating_S <- r5$manipcheckrating_S_old


r5$manipcheckgw <- dplyr::recode(r5$manipcheckgw_old, 
                          "A" = "C",
                          "B" = "B",
                          "C" = "A",
                          "D" = "D",
                          "S" = "S")



r6 <- data %>% dplyr::filter(randnumber == 6)

r6$investmentdecision_A <- r6$investmentdecision_C_old
r6$investmentdecision_B <- r6$investmentdecision_D_old
r6$investmentdecision_C <- r6$investmentdecision_A_old
r6$investmentdecision_D <- r6$investmentdecision_B_old
r6$investmentdecision_S <- r6$investmentdecision_S_old

r6$revisedinvestment_A <- r6$revisedinvestment_C_old
r6$revisedinvestment_B <- r6$revisedinvestment_D_old
r6$revisedinvestment_C <- r6$revisedinvestment_A_old
r6$revisedinvestment_D <- r6$revisedinvestment_B_old
r6$revisedinvestment_S <- r6$revisedinvestment_S_old

r6$manipcheckrating_A <- r6$manipcheckrating_C_old
r6$manipcheckrating_B <- r6$manipcheckrating_D_old
r6$manipcheckrating_C <- r6$manipcheckrating_A_old
r6$manipcheckrating_D <- r6$manipcheckrating_B_old
r6$manipcheckrating_S <- r6$manipcheckrating_S_old

r6$manipcheckgw <- dplyr::recode(r6$manipcheckgw_old, 
                          "A" = "C",
                          "B" = "D",
                          "C" = "A",
                          "D" = "B",
                          "S" = "S")


r7 <- data %>% dplyr::filter(randnumber == 7)

r7$investmentdecision_A <- r7$investmentdecision_B_old
r7$investmentdecision_B <- r7$investmentdecision_C_old
r7$investmentdecision_C <- r7$investmentdecision_D_old
r7$investmentdecision_D <- r7$investmentdecision_A_old
r7$investmentdecision_S <- r7$investmentdecision_S_old

r7$revisedinvestment_A <- r7$revisedinvestment_B_old
r7$revisedinvestment_B <- r7$revisedinvestment_C_old
r7$revisedinvestment_C <- r7$revisedinvestment_D_old
r7$revisedinvestment_D <- r7$revisedinvestment_A_old
r7$revisedinvestment_S <- r7$revisedinvestment_S_old

r7$manipcheckrating_A <- r7$manipcheckrating_B_old
r7$manipcheckrating_B <- r7$manipcheckrating_C_old
r7$manipcheckrating_C <- r7$manipcheckrating_D_old
r7$manipcheckrating_D <- r7$manipcheckrating_A_old
r7$manipcheckrating_S <- r7$manipcheckrating_S_old


r7$manipcheckgw <- dplyr::recode(r7$manipcheckgw_old, 
                          "A" = "D",
                          "B" = "A",
                          "C" = "B",
                          "D" = "C",
                          "S" = "S")


r8 <- data %>% dplyr::filter(randnumber == 8)

r8$investmentdecision_A <- r8$investmentdecision_D_old
r8$investmentdecision_B <- r8$investmentdecision_C_old
r8$investmentdecision_C <- r8$investmentdecision_B_old
r8$investmentdecision_D <- r8$investmentdecision_A_old
r8$investmentdecision_S <- r8$investmentdecision_S_old

r8$revisedinvestment_A <- r8$revisedinvestment_D_old
r8$revisedinvestment_B <- r8$revisedinvestment_C_old
r8$revisedinvestment_C <- r8$revisedinvestment_B_old
r8$revisedinvestment_D <- r8$revisedinvestment_A_old
r8$revisedinvestment_S <- r8$revisedinvestment_S_old

r8$manipcheckrating_A <- r8$manipcheckrating_D_old
r8$manipcheckrating_B <- r8$manipcheckrating_C_old
r8$manipcheckrating_C <- r8$manipcheckrating_B_old
r8$manipcheckrating_D <- r8$manipcheckrating_A_old
r8$manipcheckrating_S <- r8$manipcheckrating_S_old


r8$manipcheckgw <- dplyr::recode(r8$manipcheckgw_old, 
                          "A" = "D",
                          "B" = "C",
                          "C" = "B",
                          "D" = "A",
                          "S" = "S")



data <- rbind(r1, r2, r3, r4, r5, r6, r7, r8)



rm(r1, r2, r3, r4, r5, r6, r7, r8)


#data <- data %>% dplyr::filter(revisedinvestment_A != 102) ###### delete


check <- data %>% subset(select=c(randnumber, investmentdecision_A_old:
                                    investmentdecision_S_old,
                                  investmentdecision_A:investmentdecision_S,
                                 revisedinvestment_A_old:revisedinvestment_S_old,
                                 revisedinvestment_A:revisedinvestment_S,
                                 manipcheckgw_old,manipcheckgw, 
                                 manipcheckrating_A_old:manipcheckrating_S_old, 
                                 manipcheckrating_A:manipcheckrating_S
                                 ))

#check <- check %>%
 # dplyr::filter(investmentdecision_A_old!= 500)



check1 <- check %>% dplyr::filter(randnumber == 1)

check2 <- check %>% dplyr::filter(randnumber == 2)

check3 <- check %>% dplyr::filter(randnumber == 3)

check4 <- check %>% dplyr::filter(randnumber == 4)

check5 <- check %>% dplyr::filter(randnumber == 5)

check6 <- check %>% dplyr::filter(randnumber == 6)

check7 <- check %>% dplyr::filter(randnumber == 7)

check8 <- check %>% dplyr::filter(randnumber == 8)

rm(check1, check2, check3, check4, check5, check6, check7, check8)

data <- data %>%  subset(select=-c(investmentdecision_A_old:
                                     investmentdecision_S_old,
                                   revisedinvestment_A_old:revisedinvestment_S_old,
                                   manipcheckgw_old, manipcheckrating_A_old:
                                     manipcheckrating_S_old))


data <- data %>% relocate(duration, .after = submitdate)




## Rename, relocate ------------



data <- data %>% relocate(investmentdecision_A, .before = reasonproSIasset)
data <- data %>% relocate(investmentdecision_B, .before = reasonproSIasset)
data <- data %>% relocate(investmentdecision_C, .before = reasonproSIasset)
data <- data %>% relocate(investmentdecision_D, .before = reasonproSIasset)
data <- data %>% relocate(investmentdecision_S, .before = reasonproSIasset)

data <- data %>% relocate(revisedinvestment_A, .before = reasonproSIasset)
data <- data %>% relocate(revisedinvestment_B, .before = reasonproSIasset)
data <- data %>% relocate(revisedinvestment_C, .before = reasonproSIasset)
data <- data %>% relocate(revisedinvestment_D, .before = reasonproSIasset)
data <- data %>% relocate(revisedinvestment_S, .before = reasonproSIasset)

data <- data %>% relocate(manipcheckrating_A, .before = reasonproSIasset)
data <- data %>% relocate(manipcheckrating_B, .before = reasonproSIasset)
data <- data %>% relocate(manipcheckrating_C, .before = reasonproSIasset)
data <- data %>% relocate(manipcheckrating_D, .before = reasonproSIasset)
data <- data %>% relocate(manipcheckrating_S, .before = reasonproSIasset)

data <- data %>% relocate(manipcheckgw, .before = reasonproSIasset)



data <- data %>%
  rename(age_class = equation, 
         university_degree = equationuni, 
         learning_videos = learningstyle.1.,
         learning_text = learningstyle.2.,
         learning_quiz = learningstyle.3.,
         learning_not = learningstyle.4., 
         learning_other = learningstyle.other.)



data <- data %>%
  rename(SFL_confidence = confidenceSFL,
         SHI_greedy = stockholderimage.SHIgreedy.,
         SHI_gambler = stockholderimage.SHIgambler., 
         SHI_selfish = stockholderimage.SHIselfish., 
         SMI_immorality = stockmarketimage.SMIimm.,
         SMI_wealth_creation = stockmarketimage.SMIwcc., 
         SMI_esg = stockmarketimage.SMIesg., 
         subjective_ability_SFL = subjectiveabilitySFL,
         hassle_sust = evalsustainable.hasslesust., 
         gw_belief = evalsustainable.greenwashingbelief.,
         check_gw = checkgw, 
         values_bio_unity = values.valuesbiounity., 
         values_bio_protect = values.valuesbioprotect.,
         values_alt_justice = values.valuesaltjustice., 
         values_alt_equality = values.valuesaltequality.,
         trust_general = trustgeneral,
         pol_right = polspect.polspect., 
         stock_market_participant = stockmarketparticipa, 
         assets_percent = assetsperc, 
         assets_sust_perc = assetssustperc, 
         risk_taking = risktaking
         )



anyNA(data$datestamp)
anyNA(data$talkOnline_ID)

data <- data %>%
  subset(select = -c(startdate, submitdate))




# make uni degree numeric

anyNA(data$university_degree)
table(data$university_degree)

data <- data %>% mutate(university_degree = as.numeric(substring(
  university_degree, 1,1)))

table(data$university_degree)



## SFL index -------------------


data$SFLi3_correct <- ifelse(data$SFLi3 == 1, 1, 0)

data$SFLi4_correct <- ifelse(data$SFLi4 == 1, 1, 0)

data$SFLi5_correct <- ifelse(data$SFLi5 == 1, 1, 0)

data$SFLi12_correct <- ifelse(data$SFLi12 == 1, 1, 0)

data$SFLi16_correct <- ifelse(data$SFLi16 == 1, 1, 0)

data$SFLi19_correct <- ifelse(data$SFLi19 == 0, 1, 0)

data$SFLi26_correct <- ifelse(data$SFLi26 == 0, 1, 0)


data$sfl <- (rowMeans(data[,c("SFLi3_correct","SFLi4_correct","SFLi5_correct",
                              "SFLi12_correct",
                            "SFLi16_correct","SFLi19_correct","SFLi26_correct"
                            )], na.rm = T))

data$sfl5 <- (rowMeans(data[,c("SFLi3_correct",
                              "SFLi12_correct",
                              "SFLi16_correct","SFLi19_correct","SFLi26_correct"
)], na.rm = T))




data <- data %>% relocate(SFLi3_correct, .after = SFLi3)
data <- data %>% relocate(SFLi4_correct, .after = SFLi4)
data <- data %>% relocate(SFLi5_correct, .after = SFLi5)
data <- data %>% relocate(SFLi12_correct, .after = SFLi12)
data <- data %>% relocate(SFLi16_correct, .after = SFLi16)
data <- data %>% relocate(SFLi19_correct, .after = SFLi19)
data <- data %>% relocate(SFLi26_correct, .after = SFLi26)

data <- data %>% relocate(sfl, .after = SFLi26_correct)


sfl_only_dk <- ifelse(data$SFLi3 == 9 & data$SFLi4 == 9 & data$SFLi5== 9 & 
                        data$SFLi12== 9& data$SFLi16== 9 & data$SFLi19== 9  & 
                        data$SFLi26 == 9, 1, 0)
data <- add_column(data, sfl_only_dk, .after="SFL_confidence")

table(data$sfl_only_dk)


## SFL confidence weighted and overconficence

data$SFL_confidence_0_1 <- (data$SFL_confidence-1)/6

data$overconfidence <- data$SFL_confidence_0_1 - data$sfl

mean(data$overconfidence)
mean(data$overconfidence[which(data$gender == 1)])
mean(data$overconfidence[which(data$gender == 2)])


#x <- data %>% subset(select = c(sfl, SFL_confidence,SFL_confidence_0_1, 
 #                               overconfidence))


summary(data$SFL_confidence)

SFL_weighted <- data$sfl * data$SFL_confidence_0_1
data <- add_column(data, SFL_weighted, .after="sfl_only_dk")






## aFL index -----------------



FLastockmarket_correct <- ifelse(data$FLastockmarket == 3, 1, 0)
data <- add_column(data, FLastockmarket_correct, .after="FLastockmarket")

FLamutualfunds_correct <- ifelse(data$FLamutualfunds == 2, 1, 0)
data <- add_column(data, FLamutualfunds_correct, .after="FLamutualfunds")


FLavolatility_correct <- ifelse(data$FLavolatility == 3, 1, 0)
data <- add_column(data, FLavolatility_correct, .after="FLavolatility")

remove(FLastockmarket_correct, FLamutualfunds_correct, FLavolatility_correct)



FL_advanced <- (rowMeans(data[,c("FLastockmarket_correct",  
                                 "FLamutualfunds_correct", "FLavolatility_correct")], 
                         na.rm = T))
data <- add_column(data, FL_advanced, .after="FLastockmarket_correct")


FLa_only_dk <- ifelse(data$FLastockmarket == 4 & data$FLamutualfunds == 5 & 
                        data$FLavolatility== 4, 1, 0)
data <- add_column(data, FLa_only_dk, .after="FL_advanced")


table(data$sfl_only_dk, data$FLa_only_dk)



## Bio values index ------------

bio_values <- (rowMeans(data[,c("values_bio_protect",  "values_bio_unity")], 
                        na.rm = T))
data <- add_column(data, bio_values, .after="values_bio_protect")


CronbachAlpha(data.frame(data$values_bio_protect, data$values_bio_unity), 
              na.rm = T)


## Alt values index ------------

alt_values <- (rowMeans(data[,c("values_alt_equality",  "values_alt_justice")], 
                        na.rm = T))
data <- add_column(data, alt_values, .after="values_alt_equality")

CronbachAlpha(data.frame(data$values_alt_equality, data$values_alt_justice),
              na.rm = T)


## Stock holder index ------------

SHI <- (rowSums(data[,c("SHI_gambler",  "SHI_greedy", "SHI_selfish")], 
                na.rm = T)/3)
data <- add_column(data, SHI, .after="SHI_selfish")


CronbachAlpha(data.frame(data$SHI_gambler, data$SHI_greedy, data$SHI_selfish),
              na.rm = T)


## Stock market index ------------

mean(data$SMI_esg)
mean(data$SMI_immorality)
mean(data$SMI_wealth_creation)

table(data$SMI_wealth_creation)
data$SMI_wealth_creation <- 
  as.numeric(dplyr::recode(data$SMI_wealth_creation, 
                                                     "1" = "7", "2" = "6", 
                           "3" = "5", "4" = "4", "5" = "3", "6" ="2", "7" = "1"))
table(data$SMI_wealth_creation)



SMI <- (rowMeans(data[,c("SMI_esg",  "SMI_immorality", "SMI_wealth_creation")], 
                 na.rm = T))
data <- add_column(data, SMI, .after="SMI_esg")



CronbachAlpha(data.frame(data$SMI_esg, data$SMI_immorality, data$SMI_wealth_creation),
              na.rm = T)
psych::alpha(data.frame(data$SMI_esg, data$SMI_immorality, data$SMI_wealth_creation),
             na.rm = T)




## Stock market participation investment task --------------

SMP_investment_decision <- (rowSums(data[,c("investmentdecision_A",  
                                            "investmentdecision_B", 
                                            "investmentdecision_C" ,
                                            "investmentdecision_D")], na.rm = T)/500)
data <- add_column(data, SMP_investment_decision, .after="investmentdecision_S")



## Sustainable investments investment task ------------

SI_investment_decision <- (rowSums(data[,c("investmentdecision_C" , 
                                           "investmentdecision_D")], na.rm = T))/
  (rowSums(data[,c("investmentdecision_A",  "investmentdecision_B", 
                   "investmentdecision_C" ,"investmentdecision_D")], na.rm = T))
                                                                                                         
data <- add_column(data, SI_investment_decision, .after="SMP_investment_decision")

data$SI_investment_decision[which(data$SMP_investment_decision == 0)] <- 0.0000000

which(data$SMP_investment_decision != 0 & is.na(data$SI_investment_decision ))


## Greenwashed investment investment task --------------

GW_investment_decision <- (data$investmentdecision_C)/
  (rowSums(data[,c("investmentdecision_C" ,"investmentdecision_D")], na.rm = T))
data <- add_column(data, GW_investment_decision, .after="SI_investment_decision")

data$GW_investment_decision[which(data$SMP_investment_decision == 0)]  <- 0.0000000
data$GW_investment_decision[which(data$SI_investment_decision == 0)]  <- 0.0000000


## Savings quote -----------

savings_quote <- (data$investmentdecision_S/500)
data <- add_column(data, savings_quote, .after="GW_investment_decision")



## Revised Stock market participation investment task --------------

SMP_revised_investment <- (rowSums(data[,c("revisedinvestment_A",  
                                           "revisedinvestment_B", 
                                           "revisedinvestment_C" ,
                                           "revisedinvestment_D")], na.rm = T)/500)
data <- add_column(data, SMP_revised_investment, .after="revisedinvestment_S")


## Revised  Sustainable investments investment task ------------

SI_revised_investment <- (rowSums(data[,c("revisedinvestment_C" , 
                                          "revisedinvestment_D")], na.rm = T))/
  (rowSums(data[,c("revisedinvestment_A",  "revisedinvestment_B", 
                   "revisedinvestment_C" ,"revisedinvestment_D")], na.rm = T))

data <- add_column(data, SI_revised_investment, .after="SMP_revised_investment")

data$SI_revised_investment[which(data$SMP_revised_investment == 0)] <- 0.0000000


## Revised  Greenwashed investment investment task --------------

GW_revised_investment <- (data$revisedinvestment_C)/
  (rowSums(data[,c("revisedinvestment_C" ,"revisedinvestment_D")], na.rm = T))
data <- add_column(data, GW_revised_investment, .after="SI_revised_investment")

data$GW_revised_investment[which(data$SMP_revised_investment == 0)] <- 0.0000000
data$GW_revised_investment[which(data$SI_revised_investment == 0)]  <- 0.0000000


## Savings quote --------

savings_quote_revised <- (data$revisedinvestment_S/500)
data <- add_column(data, savings_quote_revised, .after="GW_revised_investment")



## Diff SMP Before-After

diff_SMP <- data$SMP_revised_investment - data$SMP_investment_decision
data <- add_column(data, diff_SMP, .after="savings_quote_revised")



## Diff SI Before-After

diff_SI <- data$SI_revised_investment - data$SI_investment_decision
data <- add_column(data, diff_SI, .after="diff_SMP")


## Diff GW Before-After

diff_GW <- data$GW_revised_investment - data$GW_investment_decision
data <- add_column(data, diff_GW, .after="diff_SI")



## Diff Fonds A Before-After 

diff_Fonds_A <- data$revisedinvestment_A - data$investmentdecision_A
data <- add_column(data, diff_Fonds_A, .after="diff_GW")



## Diff Fonds B Before-After 

diff_Fonds_B <- data$revisedinvestment_B - data$investmentdecision_B
data <- add_column(data, diff_Fonds_B, .after="diff_Fonds_A")


## Diff Fonds_C Before-After

diff_Fonds_C <- data$revisedinvestment_C - data$investmentdecision_C
data <- add_column(data, diff_Fonds_C, .after="diff_Fonds_B")

## Diff Fonds_D Before-After

diff_Fonds_D <- data$revisedinvestment_D - data$investmentdecision_D
data <- add_column(data, diff_Fonds_D, .after="diff_Fonds_C")

## Diff Savings Before-After

diff_Fonds_S <- data$revisedinvestment_S - data$investmentdecision_S
data <- add_column(data, diff_Fonds_S, .after="diff_Fonds_D")


## No diff in rev and inv

no_diff_inv_rev <- ifelse(data$diff_Fonds_A == 0 & data$diff_Fonds_B == 0 & 
                            data$diff_Fonds_C == 0 & data$diff_Fonds_D == 0 & 
                            data$diff_Fonds_S == 0, 1, 0)
data <- add_column(data, no_diff_inv_rev, .after="diff_Fonds_S")

table(data$no_diff_inv_rev)
384/1126



rm(diff_Fonds_A, diff_Fonds_B, diff_Fonds_D, diff_Fonds_S, no_diff_inv_rev)

## Assets percent, set NAs to 0 % based on stockmarket_participant question ----------

table(data$assets_percent)
length(which(is.na(data$assets_percent)))
data$assets_percent[which(is.na(data$assets_percent))] <- 0
table(data$assets_percent)

## Assets sustainable percent, set NAsn ----------

table(data$assets_sust_perc)
length(which(is.na(data$assets_sust_perc)))
data$assets_sust_perc[which(is.na(data$assets_sust_perc))] <- 0
table(data$assets_sust_perc)



## Recode GW Check

table(data$check_gw)
data$check_gw <- as.numeric(dplyr::recode(data$check_gw, "1" = "5", "2" = "4", 
                                          "3" = "3", "4" = "2", "5" = "1"))
table(data$check_gw)




## Manipulation check correct --------

table(data$manipcheckgw)

gw_identified <- ifelse(data$manipcheckgw == "C", 1, 0)
data <- add_column(data, gw_identified, .after="manipcheckgw")
table(data$gw_identified)

data$gw_identified[which(data$manipcheckgw == "")] <- NA
data$manipcheckgw[which(data$manipcheckgw == "")] <- NA

table(data$manipcheckgw)
data$manipcheckgw[which(data$manipcheckgw == "S")] <- "none"
table(data$manipcheckgw)

data$manipcheckgw_none <- ifelse(data$manipcheckgw == "none", 1, 0)
data <- data %>% relocate(manipcheckgw_none, .after = manipcheckgw)

table(data$manipcheckgw_none)






## Learning style recode -------------


table(data$learning_videos)
table(data$learning_text)
table(data$learning_quiz)
table(data$learning_not)


data$learning_videos[which(is.na(data$learning_videos))] <- 0
data$learning_text[which(is.na(data$learning_text))] <- 0

data$learning_quiz[which(is.na(data$learning_quiz))] <- 0
data$learning_not[which(is.na(data$learning_not))] <- 0

data <- data %>% 
  mutate(
    learning_videos = ifelse(learning_videos == "Y", 1, 0),
    learning_text = ifelse(learning_text == "Y", 1, 0),
    learning_quiz = ifelse(learning_quiz == "Y", 1, 0),
    learning_not = ifelse(learning_not == "Y", 1, 0),
  )



## no income recoded

table(data$hhincome)
hhincome_not_reported <- ifelse(data$hhincome == 10, 1, 0)
data <- add_column(data, hhincome_not_reported, .after="hhincome")

table(data$hhincome_not_reported)

data$hhincome[which(data$hhincome == 10)] <- NA

table(data$hhincome)
median(data$hhincome[which(!is.na(data$hhincome))],na.rm = T)

high_income <- ifelse(data$hhincome > 4, 1, 0)
table(high_income)
data <- add_column(data, high_income, .after="hhincome")
data$high_income[which(is.na(data$high_income))] <- 0


mean(data$high_income, na.rm = T)

rm(high_income)

## left wing

table(data$pol_right)
left_wing <- as.numeric(dplyr::recode(data$pol_right, "1" = "7", "2" = "6", 
                                      "3" = "5", "4" = "4", "5" = "3", 
                                      "6" ="2", "7" = "1"))
data <- add_column(data, left_wing, .after="pol_right")
table(data$left_wing)



# remove vectors


rm(alt_values, bio_values, diff_Fonds_C, diff_GW, diff_SI, diff_SMP, FL_advanced, 
   GW_investment_decision, GW_revised_investment, savings_quote, savings_quote_revised, 
   SHI, SI_investment_decision, SI_revised_investment, SMI, SMP_investment_decision, 
   SMP_revised_investment, hhincome_not_reported)
rm(gw_identified, left_wing, SFL_weighted)
rm(sfl_only_dk, FLa_only_dk)


## checks for recding --------

x <- check %>% dplyr::filter(randnumber == 1) %>% subset(select=c(
  investmentdecision_A, 
  investmentdecision_A_old,
  investmentdecision_B, 
  investmentdecision_B_old,
  investmentdecision_C, 
  investmentdecision_C_old,
  investmentdecision_D, 
  investmentdecision_D_old,
  investmentdecision_S,
  investmentdecision_S_old,
  revisedinvestment_A, 
  revisedinvestment_A_old,
  revisedinvestment_B, 
  revisedinvestment_B_old,
  revisedinvestment_C, 
  revisedinvestment_C_old,
  revisedinvestment_D, 
  revisedinvestment_D_old,
  revisedinvestment_S,
  revisedinvestment_S_old,
  manipcheckrating_A, 
  manipcheckrating_A_old,
  manipcheckrating_B, 
  manipcheckrating_B_old,
  manipcheckrating_C, 
  manipcheckrating_C_old,
  manipcheckrating_D, 
  manipcheckrating_D_old,
  manipcheckrating_S,
  manipcheckrating_S_old
  )) %>% 
  get_summary_stats(type = "mean_sd")




x <- check %>% dplyr::filter(randnumber == 2) %>% subset(select=c(
  investmentdecision_A, 
  investmentdecision_A_old,
  investmentdecision_B, 
  investmentdecision_D_old,
  investmentdecision_C, 
  investmentdecision_C_old,
  investmentdecision_D, 
  investmentdecision_B_old,
  investmentdecision_S,
  investmentdecision_S_old,
  revisedinvestment_A, 
  revisedinvestment_A_old,
  revisedinvestment_B, 
  revisedinvestment_D_old,
  revisedinvestment_C, 
  revisedinvestment_C_old,
  revisedinvestment_D, 
  revisedinvestment_B_old,
  revisedinvestment_S,
  revisedinvestment_S_old,
  manipcheckrating_A, 
  manipcheckrating_A_old,
  manipcheckrating_B, 
  manipcheckrating_D_old,
  manipcheckrating_C, 
  manipcheckrating_C_old,
  manipcheckrating_D, 
  manipcheckrating_B_old,
  manipcheckrating_S,
  manipcheckrating_S_old
)) %>% 
  get_summary_stats(type = "mean_sd")



x <- check %>% dplyr::filter(randnumber == 3) %>% subset(select=c(
  investmentdecision_A, 
  investmentdecision_B_old,
  investmentdecision_B, 
  investmentdecision_A_old,
  investmentdecision_C, 
  investmentdecision_D_old,
  investmentdecision_D, 
  investmentdecision_C_old,
  investmentdecision_S,
  investmentdecision_S_old,
  revisedinvestment_A, 
  revisedinvestment_B_old,
  revisedinvestment_B, 
  revisedinvestment_A_old,
  revisedinvestment_C, 
  revisedinvestment_D_old,
  revisedinvestment_D, 
  revisedinvestment_C_old,
  revisedinvestment_S,
  revisedinvestment_S_old,
  manipcheckrating_A, 
  manipcheckrating_B_old,
  manipcheckrating_B, 
  manipcheckrating_A_old,
  manipcheckrating_C, 
  manipcheckrating_D_old,
  manipcheckrating_D, 
  manipcheckrating_C_old,
  manipcheckrating_S,
  manipcheckrating_S_old
)) %>% 
  get_summary_stats(type = "mean_sd")



x <- check %>% dplyr::filter(randnumber == 4) %>% subset(select=c(
  investmentdecision_A, 
  investmentdecision_D_old,
  investmentdecision_B, 
  investmentdecision_A_old,
  investmentdecision_C, 
  investmentdecision_B_old,
  investmentdecision_D, 
  investmentdecision_C_old,
  investmentdecision_S,
  investmentdecision_S_old,
  revisedinvestment_A, 
  revisedinvestment_A_old,
  revisedinvestment_B, 
  revisedinvestment_B_old,
  revisedinvestment_C, 
  revisedinvestment_D_old,
  revisedinvestment_D, 
  revisedinvestment_C_old,
  revisedinvestment_S,
  revisedinvestment_S_old,
  manipcheckrating_A, 
  manipcheckrating_A_old,
  manipcheckrating_B, 
  manipcheckrating_C_old,
  manipcheckrating_C, 
  manipcheckrating_B_old,
  manipcheckrating_D, 
  manipcheckrating_C_old,
  manipcheckrating_S,
  manipcheckrating_S_old
)) %>% 
  get_summary_stats(type = "mean_sd")




x <- check %>% dplyr::filter(randnumber == 5) %>% subset(select=c(
  investmentdecision_A, 
  investmentdecision_C_old,
  investmentdecision_B, 
  investmentdecision_B_old,
  investmentdecision_C, 
  investmentdecision_A_old,
  investmentdecision_D, 
  investmentdecision_D_old,
  investmentdecision_S,
  investmentdecision_S_old,
  revisedinvestment_A, 
  revisedinvestment_C_old,
  revisedinvestment_B, 
  revisedinvestment_B_old,
  revisedinvestment_C, 
  revisedinvestment_A_old,
  revisedinvestment_D, 
  revisedinvestment_D_old,
  revisedinvestment_S,
  revisedinvestment_S_old,
  manipcheckrating_A, 
  manipcheckrating_C_old,
  manipcheckrating_B, 
  manipcheckrating_B_old,
  manipcheckrating_C, 
  manipcheckrating_A_old,
  manipcheckrating_D, 
  manipcheckrating_D_old,
  manipcheckrating_S,
  manipcheckrating_S_old
)) %>% 
  get_summary_stats(type = "mean_sd")


x <- check %>% dplyr::filter(randnumber == 6) %>% subset(select=c(
  investmentdecision_A, 
  investmentdecision_C_old,
  investmentdecision_B, 
  investmentdecision_D_old,
  investmentdecision_C, 
  investmentdecision_A_old,
  investmentdecision_D, 
  investmentdecision_B_old,
  investmentdecision_S,
  investmentdecision_S_old,
  revisedinvestment_A, 
  revisedinvestment_C_old,
  revisedinvestment_B, 
  revisedinvestment_D_old,
  revisedinvestment_C, 
  revisedinvestment_A_old,
  revisedinvestment_D, 
  revisedinvestment_B_old,
  revisedinvestment_S,
  revisedinvestment_S_old,
  manipcheckrating_A, 
  manipcheckrating_C_old,
  manipcheckrating_B, 
  manipcheckrating_D_old,
  manipcheckrating_C, 
  manipcheckrating_A_old,
  manipcheckrating_D, 
  manipcheckrating_B_old,
  manipcheckrating_S,
  manipcheckrating_S_old
)) %>% 
  get_summary_stats(type = "mean_sd")



x <- check %>% dplyr::filter(randnumber == 7) %>% subset(select=c(
  investmentdecision_A, 
  investmentdecision_B_old,
  investmentdecision_B, 
  investmentdecision_C_old,
  investmentdecision_C, 
  investmentdecision_D_old,
  investmentdecision_D, 
  investmentdecision_A_old,
  investmentdecision_S,
  investmentdecision_S_old,
  revisedinvestment_A, 
  revisedinvestment_B_old,
  revisedinvestment_B, 
  revisedinvestment_C_old,
  revisedinvestment_C, 
  revisedinvestment_D_old,
  revisedinvestment_D, 
  revisedinvestment_A_old,
  revisedinvestment_S,
  revisedinvestment_S_old,
  manipcheckrating_A, 
  manipcheckrating_B_old,
  manipcheckrating_B, 
  manipcheckrating_C_old,
  manipcheckrating_C, 
  manipcheckrating_D_old,
  manipcheckrating_D, 
  manipcheckrating_A_old,
  manipcheckrating_S,
  manipcheckrating_S_old
)) %>% 
  get_summary_stats(type = "mean_sd")



x <- check %>% dplyr::filter(randnumber == 8) %>% subset(select=c(
  investmentdecision_A, 
  investmentdecision_D_old,
  investmentdecision_B, 
  investmentdecision_C_old,
  investmentdecision_C, 
  investmentdecision_B_old,
  investmentdecision_D, 
  investmentdecision_A_old,
  investmentdecision_S,
  investmentdecision_S_old,
  revisedinvestment_A, 
  revisedinvestment_D_old,
  revisedinvestment_B, 
  revisedinvestment_C_old,
  revisedinvestment_C, 
  revisedinvestment_B_old,
  revisedinvestment_D, 
  revisedinvestment_A_old,
  revisedinvestment_S,
  revisedinvestment_S_old,
  manipcheckrating_A, 
  manipcheckrating_D_old,
  manipcheckrating_B, 
  manipcheckrating_C_old,
  manipcheckrating_C, 
  manipcheckrating_B_old,
  manipcheckrating_D, 
  manipcheckrating_A_old,
  manipcheckrating_S,
  manipcheckrating_S_old
)) %>% 
  get_summary_stats(type = "mean_sd")


x <- check %>% dplyr::filter(randnumber == 4)

rm(x)

table(check$manipcheckgw[which(check$randnumber == 1)], check$manipcheckgw_old[which(check$randnumber == 1)])
table(check$manipcheckgw[which(check$randnumber == 2)], check$manipcheckgw_old[which(check$randnumber == 2)])
table(check$manipcheckgw[which(check$randnumber == 3)], check$manipcheckgw_old[which(check$randnumber == 3)])
table(check$manipcheckgw[which(check$randnumber == 4)], check$manipcheckgw_old[which(check$randnumber == 4)])
table(check$manipcheckgw[which(check$randnumber == 5)], check$manipcheckgw_old[which(check$randnumber == 5)])
table(check$manipcheckgw[which(check$randnumber == 6)], check$manipcheckgw_old[which(check$randnumber == 6)])
table(check$manipcheckgw[which(check$randnumber == 7)], check$manipcheckgw_old[which(check$randnumber == 7)])
table(check$manipcheckgw[which(check$randnumber == 8)], check$manipcheckgw_old[which(check$randnumber == 8)])



## write data for winner drawing -------------

#table(data$email_not_provided)
#lottery_file <- data %>% dplyr::filter(email_not_provided == 0) %>% subset(select = c(talkOnline_ID,
 #                                                                                     email_address,
  #                                                                                    revisedinvestment_A,
   #                                                                                   revisedinvestment_B,
    #                                                                                  revisedinvestment_C,
     #                                                                                 revisedinvestment_D,
      #                                                                                revisedinvestment_S))

#which(duplicated(data$talkOnline_ID) == T)
#data$email_address[which(duplicated(data$email_address) == T & data$email_not_provided == 0)]
#write.xlsx(lottery_file, "lottery_file.xlsx")



# Delete uneccesary vars 
data <- data %>% subset(select = -c(attentioncheck1, badquality, pol_right))



#recode gender

mean(data_study1$gender)
table(data_study1$gender)
data_study1$gender[which(data_study1$gender == 1)] <- 0
data_study1$gender[which(data_study1$gender == 2)] <- 1



table(data$gender)
data$gender[which(data$gender == 1)] <- 0
data$gender[which(data$gender == 2)] <- 1
mean(data$gender)


table(data_study1$attention_check_1_failed)
table(data_study1$attention_check_2_failed)
table(data_study1$attention_check_3_failed)

rm(check, data_incomplete, timings)








